###### LOADING LIBRARIES #####
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(tidyr)
library(effects)
## Warning: package 'effects' was built under R version 4.0.5
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.0.5
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(ggplot2)
###### SET WORK SPACE AND READ IN FILE ######
setwd("~/Documents/PhD 2020/BeHo:ZiBA study/Data analysis/Tube test data analysis")
tube_test_wins = read.csv("R_input_files/Mice-full-overview.csv", header = TRUE)
###### TRANSFORM DATAFRAME FOR STATISTICS #######
#Placing NAs for dead mice (C8 mouse 3065, C16 mouse 3183, C7 mouse 2730, C3 mouse 3083, C1 mouse 4176)
tube_test_wins[76, c(32:45,48:49)] = NA
tube_test_wins[38, c(32:45,48:49)] = NA
tube_test_wins[32, c(32:45,48:49)] = NA
tube_test_wins[14, c(32:45,48:49)] = NA
tube_test_wins[4,c(28:45,47:49)] = NA
#Removing irrelevant columns
tube_test_80 <- tube_test_wins [,-c(5,8,10,15:17,21:23, 27, 31, 35:37, 39, 42:50)]
#Renaming columns without the many dots
names(tube_test_80)[2] <- "Arrive_weight"
names(tube_test_80)[5] <- "Sex"
names(tube_test_80)[6] <- "Exp_group"
#Transform the dataframe from a wide to a long format
tube_test_80 <- melt(tube_test_80, id.vars = c("Name","Arrive_weight", "Phenotype", "Crate", "Sex", "Exp_group","Cage.ID"))
#Renaming the columns created by melt()
names(tube_test_80)[8] <- "Treatments"
names(tube_test_80)[9] <- "Wins_per_rep"
names(tube_test_80)[7] <- "Cage_ID"
names(tube_test_80)[4] <- "Crate_ID"
#Add a new column with the replicate info and then rename the rows of column Treatments to only the treatments
Replicates <- c(rep("R1",80), rep("R2",80), rep("R3",80),rep("R4",80),rep(c(rep("R1",80), rep("R2",80), rep("R3",80)),4),rep("R3",80), rep("R4",80), rep("R5",80))
#add the column "Replicates" to the dataframe tube_test_80
tube_test_80 <- cbind(tube_test_80,Replicates)
#Changing the name of the factors created after transforming the dataframe. There are 19 in total corresponding to the no of reps in total.
levels(tube_test_80$Treatments) <- c("OPT","OPT","OPT","OPT","HEAT","HEAT","HEAT","COLD","COLD","COLD","DIET","DIET","DIET","ANTI","ANTI","ANTI","FMT","FMT","FMT")
levels(tube_test_80$Exp_group) <- c("Treatment", "Control") #does the glm or lm model then know how to differ from the experimental and control animals tube test wins? Ask A & O this.
#Move the no of wins column (all of it) to the first column
colnames(tube_test_80)
## [1] "Name" "Arrive_weight" "Phenotype" "Crate_ID"
## [5] "Sex" "Exp_group" "Cage_ID" "Treatments"
## [9] "Wins_per_rep" "Replicates"
tube_test_80 <- tube_test_80[, c(9,8,10,1:7)]
#Creating new factors to start statistics. A factor instead of num, chr or int account for variation between the "catagories"
tube_test_80 <- transform(tube_test_80,Replicates=as.factor(Replicates),
Phenotype=as.factor(Phenotype),
Crate=as.factor(Crate_ID),
Sex=as.factor(Sex),
Exp_group=as.factor(Exp_group),
Cage_ID=as.factor(Cage_ID))
str(tube_test_80)
## 'data.frame': 1520 obs. of 11 variables:
## $ Wins_per_rep : int 2 3 0 4 1 4 1 2 3 0 ...
## $ Treatments : Factor w/ 6 levels "OPT","HEAT","COLD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Replicates : Factor w/ 5 levels "R1","R2","R3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Name : chr "C01M3_4215" "C01M1_3942" "C01M2_2472" "C01M5_4176" ...
## $ Arrive_weight: num 19 19.3 18.4 21.8 24 ...
## $ Phenotype : Factor w/ 6 levels "Black","Brown",..: 1 2 1 5 6 1 2 2 2 6 ...
## $ Crate_ID : chr "Crate1" "Crate1" "Crate3" "Crate3" ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Exp_group : Factor w/ 2 levels "Control","Treatment": 2 2 2 2 2 2 2 2 2 2 ...
## $ Cage_ID : Factor w/ 16 levels "01M","02M","03M",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Crate : Factor w/ 8 levels "Crate1","Crate2",..: 1 1 3 3 3 3 2 2 3 1 ...
#Remove mice that are not among the 40 I'm working with:
#I have mice from cage 3-6 (males) and 10-13 (females)
cages_to_remove <- c("01M","02M","07M","08M","09F","14F","15F","16F")
condition <- tube_test_80$Cage_ID %in% cages_to_remove
tube_test_40 <- tube_test_80[!condition, ]
# Remove rows with missing values (NA) in any column if needing the df like this later
cleaned_tube_test_40 <- drop_na(tube_test_40)
str(tube_test_40)
## 'data.frame': 760 obs. of 11 variables:
## $ Wins_per_rep : int 3 0 3 1 3 0 2 4 1 3 ...
## $ Treatments : Factor w/ 6 levels "OPT","HEAT","COLD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Replicates : Factor w/ 5 levels "R1","R2","R3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Name : chr "C03M2_1576" "C03M1_3856" "C03M3_4011" "C03M4_3083" ...
## $ Arrive_weight: num 22.2 21 23.9 19 22.6 ...
## $ Phenotype : Factor w/ 6 levels "Black","Brown",..: 2 2 6 6 6 6 6 1 2 6 ...
## $ Crate_ID : chr "Crate5" "Crate5" "Crate1" "Crate3" ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Exp_group : Factor w/ 2 levels "Control","Treatment": 2 2 2 2 2 1 1 1 1 1 ...
## $ Cage_ID : Factor w/ 16 levels "01M","02M","03M",..: 3 3 3 3 3 4 4 4 4 4 ...
## $ Crate : Factor w/ 8 levels "Crate1","Crate2",..: 5 5 1 3 2 2 1 5 2 5 ...
######## QUICK VISUALIZATION OF MY DATA (THE DEPENDENT VARIABLE) #####
hist(tube_test_80$Wins_per_rep, breaks=seq(0,4,by=0.2), ylim = c(0,400), xlab="Dominance rank", main="Histogram showing frequency of wins for all tube tests for all 80 mice")

hist(tube_test_40$Wins_per_rep, breaks=seq(0,4,by=0.2), ylim = c(0,200),xlab="Dominance rank", main="Histogram showing frequency of wins for all tube tests for the 40 specific mice")

######## STATISTICS - using glm (Generalized Linear Models) and lm (Multiple Linear Regression Models) ####
glimpse(tube_test_40)
## Rows: 760
## Columns: 11
## $ Wins_per_rep <int> 3, 0, 3, 1, 3, 0, 2, 4, 1, 3, 3, 1, 2, 4, 0, 1, 2, 4, 0,…
## $ Treatments <fct> OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, O…
## $ Replicates <fct> R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, …
## $ Name <chr> "C03M2_1576", "C03M1_3856", "C03M3_4011", "C03M4_3083", …
## $ Arrive_weight <dbl> 22.23, 20.96, 23.90, 19.02, 22.55, 21.66, 16.23, 19.66, …
## $ Phenotype <fct> Brown, Brown, White, White, White, White, White, Black, …
## $ Crate_ID <chr> "Crate5", "Crate5", "Crate1", "Crate3", "Crate2", "Crate…
## $ Sex <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Ma…
## $ Exp_group <fct> Treatment, Treatment, Treatment, Treatment, Treatment, C…
## $ Cage_ID <fct> 03M, 03M, 03M, 03M, 03M, 04M, 04M, 04M, 04M, 04M, 05M, 0…
## $ Crate <fct> Crate5, Crate5, Crate1, Crate3, Crate2, Crate2, Crate1, …
glimpse(tube_test_80)
## Rows: 1,520
## Columns: 11
## $ Wins_per_rep <int> 2, 3, 0, 4, 1, 4, 1, 2, 3, 0, 3, 0, 3, 1, 3, 0, 2, 4, 1,…
## $ Treatments <fct> OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, O…
## $ Replicates <fct> R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, …
## $ Name <chr> "C01M3_4215", "C01M1_3942", "C01M2_2472", "C01M5_4176", …
## $ Arrive_weight <dbl> 19.01, 19.28, 18.44, 21.80, 24.00, 20.20, 17.54, 20.78, …
## $ Phenotype <fct> Black, Brown, Black, LIGHT BROWN, White, Black, Brown, B…
## $ Crate_ID <chr> "Crate1", "Crate1", "Crate3", "Crate3", "Crate3", "Crate…
## $ Sex <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Ma…
## $ Exp_group <fct> Treatment, Treatment, Treatment, Treatment, Treatment, T…
## $ Cage_ID <fct> 01M, 01M, 01M, 01M, 01M, 02M, 02M, 02M, 02M, 02M, 03M, 0…
## $ Crate <fct> Crate1, Crate1, Crate3, Crate3, Crate3, Crate3, Crate2, …
### lm (Multiple Linear Regression Models)
#ALL 80 MICE
#Because the Mice IDs are not of relevance here, we exclude the Name column, but test all other variables
lm_result_all <- lm(Wins_per_rep ~ .-Name,data=tube_test_80)
summary(lm_result_all)
##
## Call:
## lm(formula = Wins_per_rep ~ . - Name, data = tube_test_80)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7766 -0.9772 -0.1031 0.9403 3.0461
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.575308 0.427080 6.030 2.08e-09 ***
## TreatmentsHEAT 0.018433 0.110400 0.167 0.867418
## TreatmentsCOLD 0.051767 0.110400 0.469 0.639212
## TreatmentsDIET 0.038120 0.110802 0.344 0.730866
## TreatmentsANTI -0.109995 0.112421 -0.978 0.328027
## TreatmentsFMT -0.120770 0.130041 -0.929 0.353194
## ReplicatesR2 0.025381 0.088230 0.288 0.773643
## ReplicatesR3 0.004000 0.086977 0.046 0.963325
## ReplicatesR4 0.046027 0.140104 0.329 0.742566
## ReplicatesR5 0.025013 0.200248 0.125 0.900610
## Arrive_weight 0.002154 0.016420 0.131 0.895642
## PhenotypeBrown -1.044612 0.104533 -9.993 < 2e-16 ***
## Phenotypecaramel 0.237762 0.340617 0.698 0.485269
## Phenotypedark brown 1.383046 0.337843 4.094 4.48e-05 ***
## PhenotypeLIGHT BROWN -0.472570 0.211802 -2.231 0.025821 *
## PhenotypeWhite -1.043615 0.108200 -9.645 < 2e-16 ***
## Crate_IDCrate2 -0.370384 0.139157 -2.662 0.007862 **
## Crate_IDCrate3 -0.309293 0.155515 -1.989 0.046908 *
## Crate_IDCrate4 -0.054754 0.240489 -0.228 0.819928
## Crate_IDCrate5 0.269836 0.146165 1.846 0.065081 .
## Crate_IDCrate6 0.262688 0.235866 1.114 0.265586
## Crate_IDCrate7 -0.388380 0.237541 -1.635 0.102266
## Crate_IDCrate8 0.667842 0.246728 2.707 0.006873 **
## SexMale NA NA NA NA
## Exp_groupTreatment -0.313367 0.198050 -1.582 0.113809
## Cage_ID02M 0.722391 0.193997 3.724 0.000204 ***
## Cage_ID03M 0.585602 0.210658 2.780 0.005508 **
## Cage_ID04M 0.264191 0.285637 0.925 0.355162
## Cage_ID05M 0.120988 0.201548 0.600 0.548405
## Cage_ID06M -0.222318 0.202192 -1.100 0.271717
## Cage_ID07M 0.007626 0.202175 0.038 0.969918
## Cage_ID08M 0.785715 0.216479 3.630 0.000294 ***
## Cage_ID09F -0.450626 0.214372 -2.102 0.035719 *
## Cage_ID10F 0.743322 0.201076 3.697 0.000227 ***
## Cage_ID11F -0.054298 0.203884 -0.266 0.790033
## Cage_ID12F 0.361630 0.204612 1.767 0.077372 .
## Cage_ID13F NA NA NA NA
## Cage_ID14F 0.455238 0.201021 2.265 0.023683 *
## Cage_ID15F 0.803178 0.205041 3.917 9.38e-05 ***
## Cage_ID16F NA NA NA NA
## CrateCrate2 NA NA NA NA
## CrateCrate3 NA NA NA NA
## CrateCrate4 NA NA NA NA
## CrateCrate5 NA NA NA NA
## CrateCrate6 NA NA NA NA
## CrateCrate7 NA NA NA NA
## CrateCrate8 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.238 on 1450 degrees of freedom
## (33 observations deleted due to missingness)
## Multiple R-squared: 0.1691, Adjusted R-squared: 0.1485
## F-statistic: 8.197 on 36 and 1450 DF, p-value: < 2.2e-16
plot(lm_result_all)




#SPCEIFIC 40 MICE ONLY
lm_result_all <- lm(Wins_per_rep ~ .-Name,data=tube_test_40)
summary(lm_result_all)
##
## Call:
## lm(formula = Wins_per_rep ~ . - Name, data = tube_test_40)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43206 -0.95705 -0.04025 0.87291 2.86905
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.618482 0.694919 5.207 2.50e-07 ***
## TreatmentsHEAT 0.014868 0.154077 0.096 0.92315
## TreatmentsCOLD 0.006535 0.154077 0.042 0.96618
## TreatmentsDIET 0.014868 0.154077 0.096 0.92315
## TreatmentsANTI -0.070838 0.155259 -0.456 0.64834
## TreatmentsFMT -0.058325 0.179371 -0.325 0.74515
## ReplicatesR2 -0.005025 0.122585 -0.041 0.96731
## ReplicatesR3 -0.003941 0.120822 -0.033 0.97399
## ReplicatesR4 -0.018515 0.194456 -0.095 0.92417
## ReplicatesR5 0.001593 0.275172 0.006 0.99538
## Arrive_weight -0.023974 0.026451 -0.906 0.36504
## PhenotypeBrown -1.331121 0.139145 -9.566 < 2e-16 ***
## PhenotypeLIGHT BROWN 0.217656 0.341217 0.638 0.52375
## PhenotypeWhite -1.439426 0.151814 -9.482 < 2e-16 ***
## Crate_IDCrate2 0.561053 0.193550 2.899 0.00386 **
## Crate_IDCrate3 -0.284023 0.269166 -1.055 0.29169
## Crate_IDCrate4 -0.067226 0.337371 -0.199 0.84211
## Crate_IDCrate5 0.484845 0.175304 2.766 0.00582 **
## Crate_IDCrate6 -0.309033 0.291899 -1.059 0.29009
## Crate_IDCrate7 -0.703396 0.310029 -2.269 0.02357 *
## Crate_IDCrate8 0.262489 0.365814 0.718 0.47327
## SexMale NA NA NA NA
## Exp_groupTreatment -0.126818 0.218510 -0.580 0.56184
## Cage_ID04M -0.431109 0.296289 -1.455 0.14609
## Cage_ID05M -0.714613 0.239672 -2.982 0.00296 **
## Cage_ID06M -0.994507 0.222106 -4.478 8.76e-06 ***
## Cage_ID10F 0.518879 0.202433 2.563 0.01057 *
## Cage_ID11F -0.452999 0.235648 -1.922 0.05495 .
## Cage_ID12F NA NA NA NA
## Cage_ID13F NA NA NA NA
## CrateCrate2 NA NA NA NA
## CrateCrate3 NA NA NA NA
## CrateCrate4 NA NA NA NA
## CrateCrate5 NA NA NA NA
## CrateCrate6 NA NA NA NA
## CrateCrate7 NA NA NA NA
## CrateCrate8 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.223 on 727 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.2183, Adjusted R-squared: 0.1903
## F-statistic: 7.807 on 26 and 727 DF, p-value: < 2.2e-16
plot(lm_result_all)




### glm (Generalized Linear Models) using the poisson model
#ALL 80 MICE
#Because the Mice IDs are not of relevance here, we exclude the Name column, but test all other variables
glm_result <- glm(Wins_per_rep ~ .-Name,data=tube_test_80,family = poisson())
summary(glm_result)
##
## Call:
## glm(formula = Wins_per_rep ~ . - Name, family = poisson(), data = tube_test_80)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.42271 -0.77571 -0.08021 0.65960 2.10534
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.985206 0.250896 3.927 8.61e-05 ***
## TreatmentsHEAT 0.009598 0.064018 0.150 0.880826
## TreatmentsCOLD 0.026547 0.063737 0.417 0.677035
## TreatmentsDIET 0.019482 0.064033 0.304 0.760933
## TreatmentsANTI -0.057376 0.066122 -0.868 0.385540
## TreatmentsFMT -0.063008 0.076455 -0.824 0.409875
## ReplicatesR2 0.013072 0.051132 0.256 0.798218
## ReplicatesR3 0.002011 0.050561 0.040 0.968277
## ReplicatesR4 0.024022 0.081529 0.295 0.768262
## ReplicatesR5 0.013077 0.118303 0.111 0.911981
## Arrive_weight 0.001826 0.010085 0.181 0.856304
## PhenotypeBrown -0.542872 0.060904 -8.914 < 2e-16 ***
## Phenotypecaramel 0.124981 0.177132 0.706 0.480448
## Phenotypedark brown 0.445839 0.162741 2.740 0.006152 **
## PhenotypeLIGHT BROWN -0.220393 0.123177 -1.789 0.073577 .
## PhenotypeWhite -0.538552 0.064836 -8.306 < 2e-16 ***
## Crate_IDCrate2 -0.202961 0.084941 -2.389 0.016875 *
## Crate_IDCrate3 -0.177914 0.093150 -1.910 0.056136 .
## Crate_IDCrate4 -0.051602 0.149047 -0.346 0.729180
## Crate_IDCrate5 0.110287 0.084200 1.310 0.190259
## Crate_IDCrate6 0.103837 0.141628 0.733 0.463459
## Crate_IDCrate7 -0.254371 0.149675 -1.699 0.089228 .
## Crate_IDCrate8 0.310743 0.149235 2.082 0.037321 *
## SexMale NA NA NA NA
## Exp_groupTreatment -0.213216 0.122260 -1.744 0.081167 .
## Cage_ID02M 0.403339 0.117941 3.420 0.000627 ***
## Cage_ID03M 0.352764 0.131309 2.687 0.007220 **
## Cage_ID04M 0.089943 0.174109 0.517 0.605445
## Cage_ID05M 0.078818 0.121094 0.651 0.515120
## Cage_ID06M -0.071373 0.121449 -0.588 0.556747
## Cage_ID07M 0.024071 0.125045 0.192 0.847354
## Cage_ID08M 0.449968 0.132245 3.403 0.000668 ***
## Cage_ID09F -0.198612 0.137774 -1.442 0.149422
## Cage_ID10F 0.451013 0.124131 3.633 0.000280 ***
## Cage_ID11F 0.005785 0.124976 0.046 0.963078
## Cage_ID12F 0.209845 0.125971 1.666 0.095748 .
## Cage_ID13F NA NA NA NA
## Cage_ID14F 0.298496 0.123355 2.420 0.015528 *
## Cage_ID15F 0.480633 0.127573 3.767 0.000165 ***
## Cage_ID16F NA NA NA NA
## CrateCrate2 NA NA NA NA
## CrateCrate3 NA NA NA NA
## CrateCrate4 NA NA NA NA
## CrateCrate5 NA NA NA NA
## CrateCrate6 NA NA NA NA
## CrateCrate7 NA NA NA NA
## CrateCrate8 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1787.1 on 1486 degrees of freedom
## Residual deviance: 1560.5 on 1450 degrees of freedom
## (33 observations deleted due to missingness)
## AIC: 4872.6
##
## Number of Fisher Scoring iterations: 5
plot(glm_result)




#SPECIFIC 40 MICE ONLY
glm_result <- glm(Wins_per_rep ~ .-Name,data=tube_test_40,family = poisson())
summary(glm_result)
##
## Call:
## glm(formula = Wins_per_rep ~ . - Name, family = poisson(), data = tube_test_40)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.19239 -0.77496 -0.03654 0.58023 2.04397
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2880107 0.4207857 3.061 0.002206 **
## TreatmentsHEAT 0.0074473 0.0892944 0.083 0.933532
## TreatmentsCOLD 0.0032720 0.0893920 0.037 0.970802
## TreatmentsDIET 0.0074473 0.0892944 0.083 0.933532
## TreatmentsANTI -0.0361338 0.0907774 -0.398 0.690595
## TreatmentsFMT -0.0297379 0.1049425 -0.283 0.776891
## ReplicatesR2 -0.0025349 0.0712018 -0.036 0.971601
## ReplicatesR3 -0.0019785 0.0701821 -0.028 0.977510
## ReplicatesR4 -0.0094148 0.1134252 -0.083 0.933848
## ReplicatesR5 0.0009109 0.1610300 0.006 0.995486
## Arrive_weight -0.0036823 0.0164730 -0.224 0.823119
## PhenotypeBrown -0.6409863 0.0782605 -8.190 2.60e-16 ***
## PhenotypeLIGHT BROWN 0.0787051 0.1718906 0.458 0.647039
## PhenotypeWhite -0.7301891 0.0921219 -7.926 2.26e-15 ***
## Crate_IDCrate2 0.3247412 0.1136101 2.858 0.004258 **
## Crate_IDCrate3 -0.1621914 0.1753692 -0.925 0.355040
## Crate_IDCrate4 -0.0275113 0.2087908 -0.132 0.895170
## Crate_IDCrate5 0.2385385 0.1055724 2.259 0.023854 *
## Crate_IDCrate6 -0.1391767 0.1749053 -0.796 0.426191
## Crate_IDCrate7 -0.3895277 0.1966837 -1.980 0.047650 *
## Crate_IDCrate8 0.1731069 0.2250214 0.769 0.441721
## SexMale NA NA NA NA
## Exp_groupTreatment -0.0607722 0.1359041 -0.447 0.654753
## Cage_ID04M -0.2291182 0.1803810 -1.270 0.204017
## Cage_ID05M -0.3984480 0.1445578 -2.756 0.005846 **
## Cage_ID06M -0.4778446 0.1314431 -3.635 0.000278 ***
## Cage_ID10F 0.2945573 0.1264076 2.330 0.019795 *
## Cage_ID11F -0.2249200 0.1489704 -1.510 0.131087
## Cage_ID12F NA NA NA NA
## Cage_ID13F NA NA NA NA
## CrateCrate2 NA NA NA NA
## CrateCrate3 NA NA NA NA
## CrateCrate4 NA NA NA NA
## CrateCrate5 NA NA NA NA
## CrateCrate6 NA NA NA NA
## CrateCrate7 NA NA NA NA
## CrateCrate8 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 906.16 on 753 degrees of freedom
## Residual deviance: 753.71 on 727 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 2471.5
##
## Number of Fisher Scoring iterations: 5
plot(glm_result)




#The glm model results of 80 vs. 40 mice differ
#However, other families may be a better fit for my data, and thus, I read about them using ?glm()
#I also used ChatGPT to help me understand the definition of each family
#Turns out the Quasipoisson could fit my data better than poisson
#So to find out if this is the case, I have to explore my data
#If the variance is larger than the mean in my dataset (=overdispersion), then quasipoisson is a better fit
#You only calculate the variance (sd) and mean of the dependent variable, which in my case is number of wins
#First testing this with all 80 mice
dependent_variable_wins <- tube_test_80$Wins_per_rep
mean_value <- mean(dependent_variable_wins,na.rm=TRUE)
sd_value <- sd(dependent_variable_wins,na.rm=TRUE)
coefficient_dispersion <- (sd_value / mean_value) * 100
coefficient_variation <- (sd_value / mean_value) * 100
print(coefficient_dispersion) #69.62755
## [1] 69.62755
print(coefficient_variation) #69.62755
## [1] 69.62755
#The results of the 40 specific mice (tube_test_40) were: 68.8126 and 68.8126
#In this case, the standard deviation (sd) = variance is equal to the mean, resulting in the coefficients being the same.
#So since my data is count data with no overdispersion, the Poisson family might be suitable indeed.
#When the standard deviation is equal to the mean, it implies that the data values are evenly distributed around the mean (which is clear when visualising the number of wins in a histogram).
#This can occur when the data is symmetrically distributed or when there is a specific relationship between the values in your dataset. The latter is my data (0,1,2,3 or 4 wins)
#I looked at papers doing tube test analyses and one named "social hierarchy position in female mice is associated with plasma corticosterone levels and hypothalamic gene expression" (2019) by Williamson et al. uses a Poisson-lognormal distribution test.
#I want to see if the Poisson-lognormal distribution test fits my data better than regular poisson
#TEST ON SPECIFIC 40 MICE
# Fit Poisson regression model
#poisson_model <- glm(Wins_per_rep ~ . - Name, data = cleaned_tube_test_40, family = "poisson")
poisson_model <- glm(Wins_per_rep ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "poisson")
# Fit Poisson-lognormal regression model with log-transformed response
#log_count_var <- log(cleaned_tube_test_40$Wins_per_rep)
#cleaned_tube_test_40$log_wins <- log_count_var
#head(cleaned_tube_test_40)
#poisson_lognormal_model <- glm(log_wins ~ . - Name, data = cleaned_tube_test_40, family = "gaussian") #does not work
#poisson_lognormal_model <- glm(log_wins ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "gaussian") #does not work
#cleaned_tube_test_40 <- within(cleaned_tube_test_40, rm(log_wins))
#poisson_lognormal_model <- glm(log(Wins_per_rep) ~ . - Name, data = cleaned_tube_test_40, family = "gaussian") #does not work
#poisson_lognormal_model <- glm(log(Wins_per_rep) ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "gaussian") #does not work